/////////////////////////////////////////////////////////////////////////////
*Figure 2: Gender Wage and Employment gap by family status
/////////////////////////////////////////////////////////////////////////////

//Dependent variables
local regvar regular1
 foreach var in working  logwage  `regvar' {
*



 //Load data
use Data\klips_workingdata_hh, clear
//Keep 2010-19 only
keep if years>=2010 & years<=2019
//Keep only ages 25-54
keep if age>=25 & age<=54



//Define cross-sectional weight for multi-year analysis for only those 25-54: reweight to sum to 100 in each year so that no year weights more than another year in a multi-year regression
foreach weightvar in _weight09_ _weight18_{
bysort years: egen temp_tot`weightvar' = total(`weightvar')
gen temp_`weightvar'_new = 100*`weightvar'/temp_tot`weightvar'
}
drop temp_*
gen _weight2554_ = _weight09__new if years>=2009 & years<2018
replace _weight2554_ = _weight18__new if years>=2018



//Specify regression options
local weight [pw=_weight2554_]
local stderrors robust cluster(pid)


est clear
local a 
local b
local c
local regvars


	
//Generate categories of family status
local age 65
gen hh_over`age' = 1 if (hh_maxage>`age' & !missing(hh_maxage)) 
gen dependents = 0 if (havegivenbirth==1|childreninhousehold==1|hh_child==1) /*child in household*/
replace dependents = 1 if (havegivenbirth!=1 & childreninhousehold!=1 & hh_child!=1) & hh_over`age'==1 /*no child in household, elder is in household*/
replace dependents = 2 if evermarried==0 & (havegivenbirth!=1 & childreninhousehold!=1 & hh_child!=1) & hh_over`age'!=1 /*never married, no child in household, no elder in household*/
local cond dependents

//Summary stats for dependent category
replace dependents = -9 if missing(dependents)
tab dependents if female==1 [aw=_weight_]

//Generate interactions between female and dependent category
forvalues k = 0/2{
	gen female_d`k' = (female==1 & dependents==`k') //our three dependent categories
}
	gen female_dm = (female==1 & dependents==-9)
forvalues i = 25(5)50{
	local j = `i'+4
	forvalues k = 0/2{
	gen female_d`k'_`i'_`j' = (female==1 & dependents==`k' & age>=`i' & age<=`j') //our three dependent categories
}
	gen female_dm_`i'_`j' = (female==1 & dependents==-9& age>=`i' & age<=`j')
}




//Regression controls and graph labels
if "`var'"=="working"{
	global depvar working
	global label employment
	global title A: $label
	global axes xsc(r(-0.55 0.15 )) xlabel(-0.5 (0.1) 0.1)
	global controls i.age i.years i.edu i.residence
}

if "`var'"=="logwage"{
	global depvar logwage
	global label log wage
	global title B: $label
	global axes xsc(r(-0.55 0.15 )) xlabel(-0.5 (0.1) 0.1)
	global controls  i.years i.age i.edu i.residence i.ind i.occ worktime i.status i.union i.workplace 
}
if "`var'"=="`regvar'"{
	global depvar `regvar'
	global label regular job
	global title C: $label
	global axes  xsc(r(-0.55 0.15 )) xlabel(-0.5 (0.1) 0.1)
	global controls i.age i.years i.edu i.residence i.ind i.occ
}





// Re run regressions once for each dependent category only so that different categories get different colors on coefplot - this step is not necessary if just making regression table

***********************
//Regressions - all ages

forvalues num = 0/2{
	capture drop female_main
	gen female_main = female_d`num'
	lab var female_main "{bf: All ages}"
	local regvars female_main female_dm female_d0 female_d1 female_d2

//Regressions, by family status (with controls)	
//Note: estimates gender wage gap for each group of women by family status against ALL men 
	di "All ages " 
	qui : reg $depvar `regvars' $controls `weight', `stderrors'
	est sto e_`num'_all
}


*****
forvalues num = 0/2{
//Regressions - by narrow age group
local regvars
		//By age
		forvalues i = 25(5)50{
		local j = `i' + 4
			capture drop female_main_`i'_`j'
			gen female_main_`i'_`j' = female_d`num'_`i'_`j'
			lab var female_main_`i'_`j' "`i'-`j'"
			local regvars `regvars' female_main_`i'_`j' female_dm_`i'_`j' female_d0_`i'_`j' female_d1_`i'_`j' female_d2_`i'_`j'
		}

//Regressions, by family status (with controls)	
	di  "By age "
	qui: reg $depvar `regvars' $controls `weight', `stderrors'
	est sto e_`num'_byage
}

	
//Regression table
esttab e_0_all e_1_all e_2_all , keep(female_main)
esttab e_0_byage e_1_byage e_2_byage , keep(female_main*)

//Coefficient plot
 coefplot ///
(e_2_all e_2_byage, mcolor(cranberry) ciopts(lcolor(cranberry%30) lwidth(thick)) offset(0.20)) ///
(e_0_all e_0_byage,  mcolor(blue*1.5) ciopts(lcolor(blue*1.5%30) lwidth(thick)) offset(0)) ///
(e_1_all , mcolor(green*0.7) ciopts(lcolor(green*0.7%30) lwidth(thick)) offset(-0.20)) ///
 , keep(female_m*) ///
 legend(pos(6) cols(3)   order(2 "Never married, no child, no over-`age'"  4 "Child in household, no over-`age'" 6 "Over-`age' in household, no child"  )) ///
  $axes  xline(0, lwidth(thin) lcolor(red)) ///
 title("$title") xtitle("Gender $label gap," "2010-19 (men = 0)") ///
 ylabel(, angle(45)) headings(female_main_25_29 = "{bf: By age}", angle(45)) ///
 yline(2, lwidth(thin) lpattern(solid)) yline(2.1, lwidth(thin) lpattern(solid))
 graph save temp_`var'_bydependents_byage.gph, replace

 }

 
 //Figure 2
grc1leg temp_working_bydependents_byage.gph temp_logwage_bydependents_byage.gph temp_`regvar'_bydependents_byage.gph,  cols(3) leg(temp_logwage_bydependents_byage.gph) 
graph export Figures\Figure2.pdf, replace
 

//Erase intermediate graphs
erase temp_logwage_bydependents_byage.gph
erase temp_working_bydependents_byage.gph
erase temp_`regvar'_bydependents_byage.gph

